import json
import os
import math
from collections import defaultdict
from collections import Counter

def count_results(directory):
    results = defaultdict(list)

    for filename in os.listdir(directory):
        if filename.endswith('_val.json'):
            file_path = os.path.join(directory, filename)
            with open(file_path, 'r', encoding='utf-8') as file:
                data = json.load(file)
                for item in data:
                    question_id = str(item["question_id"])
                    answer = item["answer"]
                    results[question_id].append(answer)

    return results

'''
def calculate_variance(answers):
    counts = Counter(answers)
    total = sum(counts.values())

    if total == 0:
        return 0

    frequencies = list(counts.values())
    mean_frequency = sum(frequencies) / len(frequencies)
    variance = sum((freq - mean_frequency) ** 2 for freq in frequencies) / len(frequencies)
    return variance
'''


def write_results_to_file(results, output_file):
    uncertainty_results = []
    for question_id, answers in results.items():
        uncertainty = len(Counter(answers))
        uncertainty_results.append((question_id, uncertainty, answers))

    uncertainty_results.sort(key=lambda x: x[1], reverse=True)

    with open(output_file, 'w', encoding='utf-8') as file:
        for question_id, uncertainty, answers in uncertainty_results:
            file.write(f"Question ID: {question_id}, Uncertainty: {uncertainty:.4f}, Answers: {answers}\n")


'''
def calculate_entropy(answers):
    counts = Counter(answers)
    total = sum(counts.values())

    if total == 0:
        return 0.0

    entropy = 0.0
    for count in counts.values():
        p = count / total
        entropy += p * math.log2(p)

    return -entropy


def write_results_to_file(results, output_file):
    entropy_results = []
    for question_id, answers in results.items():
        entropy = calculate_entropy(answers)
        entropy_results.append((question_id, entropy, answers))

    entropy_results.sort(key=lambda x: x[1], reverse=True)

    with open(output_file, 'w', encoding='utf-8') as file:
        for question_id, entropy, answers in entropy_results:
            file.write(f"Question ID: {question_id}, Entropy: {entropy:.4f}, Answers: {answers}\n")
'''

directory_path = '/home/test/yxl/MCoT/textvqa/results/llava'
result_counts = count_results(directory_path)

output_file_path = '/home/test/yxl/MCoT/textvqa/results/llava/results_summary.txt'
write_results_to_file(result_counts, output_file_path)
